# Boyoon Lee "The Impact of Educational Content on Anti-Immigrant Attitudes"
# 4. Online Appendix D (Figure D.5, Table D.5, Table D.6)
# Last updated: 2022-11-16


# Initial settings --------------------------------------------------------

# Set directory
setwd("Your Path Here")

# Packages
library(ggplot2) 
library(tidyr)
library(dplyr)
library(miceadds) # for lm.cluster
library("MatchIt"); library("cobalt"); library("cem") # for matching


# Load data  --------------------------------------------------------------
data <- read.csv("./cleaned_TSCS_did.csv", header=TRUE, sep=",")


# Recode / Re-class  ------------------------------------------------------

### Make date variable using Birth year and Birth month
# Since there is no date of birth available, I set the date to the first day of the month
data$date <- as.Date(with(data, paste(birth_yr, birth_month, "01", sep="-")), "%Y-%m-%d")

### Only leave school cohorts before 2002 
# In 2001, Knowing Taiwan series discontinued and replaced with the New Grade 1-9 curriculum (9 year curriculum)
# In addition, from 1968, junior vocational school shut down and expanded senior vocational school
data<-subset(data, data$school_yr<2002&data$school_yr>1967)


# Subsets for the Comparison Group ---------------------------------------

# Main comparison group 
yr13_22<-subset(data,school_yr>1987 & school_yr<2002)
yr13_22<-subset(yr13_22,edu_level>3) # those who graduated from at least junior high schools




#############################################################
##              [APPENDIX D]  Matching                     ##
#############################################################


# Preparation  -----------------------------------------------------------

# Obs with complete cases
complete<-yr13_22 %>% 
  select(num_imm,post,group,group_plus,female,married,employed,social_class,edu_uni,ba,
         edu_level,loc_birth,school_yr,year,prts_gen,date)
complete <- complete[complete.cases(complete), ] 
cov<-c('female','married','employed','social_class','edu_uni','prts_gen',
       'loc_birth','school_yr','year')


# Matching  -------------------------------------------------------------
# NOTE: The following set.seed() was conducted under R 3.5.1 
# Users with R version higher than 3.6.0 will yield different results 
# because sample() algorithm for the set.seed() function has changed from R version 3.6.0.


### Load the previously saved matched data
# Matched using all imbalanced variables
load("./matched_data_Ver3_5_1.Rda")
# Matched using pre-treatment covariates
load("./matched_data_pre_Ver3_5_1.Rda")


### The above matched data sets are saved using the following codes
## On those with imbalance
#set.seed(1000)
#m.out <- matchit(group ~ married + employed + edu_uni + prts_gen + loc_birth + school_yr, 
#                 data = complete, method = "nearest", 
#                 replace =  FALSE, ratio =1, caliper=0.1, m.order="random")
#sum.m.out<-summary(m.out)
## Save matched data 
#dta_m <- match.data(m.out)
#dta_m$group = ifelse(dta_m$group == 0,"Control", "Treatment")
#save(dta_m, m.out, sum.m.out, file="./matched_data_Ver3_5_1.Rda")

## On Pre-treatment covariates
#set.seed(1000)
#m.out.pre <- matchit(group ~ female + prts_gen + loc_birth + school_yr + year,
#                 data = complete, method = "nearest", 
#                 replace =  FALSE, ratio =1, caliper=0.1, m.order="random")
#sum.m.out.pre<-summary(m.out.pre)
## Save matched data
#dta_m.pre <- match.data(m.out.pre)
#dta_m.pre$group = ifelse(dta_m.pre$group == 0,"Control", "Treatment")
#save(dta_m.pre, m.out.pre, sum.m.out.pre, file="./matched_data_pre_Ver3_5_1.Rda")


# Figure D.5  -------------------------------------------------------------

# Plot for balance checking
v<-data.frame(old=c('female','married','employed','social_class','edu_uni','prts_gen',
                    'loc_birth','school_yr'),
              new=c("Female","Married","Employed","social class","University graduate","Parents: academic",
                    "Location of birth","Cohort year"))
love.plot(bal.tab(m.out,m.threshold=0.1,binary="std"),
          stat="mean.diffs",var.names=v,abs=F,line=TRUE,
          shapes= c("circle", "triangle filled"),
          colors=c("black","gray"))


# Table D.5  -------------------------------------------------------------

# Group means for matched data
# Before matching
lapply(cov, function(v) {
  t.test(complete[, v] ~ complete$group)
})
# After matching
lapply(cov, function(v) {
  t.test(dta_m[, v] ~ dta_m$group)
})


# Table D.6  -------------------------------------------------------------

# (1): Matched with pre-treatment covariates
psm.cl.2.pre <- lm.cluster(data= dta_m.pre,
                           formula= num_imm ~ post*group
                           + female + married + employed + social_class+ edu_uni
                           + post*prts_gen 
                           + as.factor(loc_birth)+ as.factor(edu_level) +as.factor(school_yr) + as.factor(year),
                           cluster = dta_m.pre$school_yr*dta_m.pre$group_plus)
summary(psm.cl.2.pre)

# (2): Matched with pre- and post-treatment covariates
psm.cl.2 <- lm.cluster(data= dta_m,
                       formula= num_imm ~ post*group
                       + female + married + employed + social_class+ edu_uni
                       + post*prts_gen 
                       + as.factor(loc_birth)+ as.factor(edu_level) +as.factor(school_yr) + as.factor(year),
                       cluster = dta_m$school_yr*dta_m$group_plus)
summary(psm.cl.2)

